Purpose: use paired air-stream temperature signal analysis to estimate daily groundwater contributions to streamflow

7.1 Data

7.1.1 Site information

Code
siteinfo <- read_csv("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/Data/EcoDrought_SiteInformation.csv")
siteinfo_sp <- st_as_sf(siteinfo, coords = c("long", "lat"), crs = 4326)
mapview(siteinfo_sp, zcol = "designation")

Define G-g clusters/sub-basins

Code
siteinfo2 <- siteinfo %>% 
  filter(!site_name %in% c("WoundedBuckCreek", "Brackett Creek", "South River Conway NWIS", 
                           "Shields River nr Livingston NWIS", "North Fork Flathead River NWIS", 
                           "Pacific Creek at Moran NWIS")) %>%
  mutate(designation = ifelse(site_name %in% c("Donner Blitzen River nr Frenchglen NWIS", 
                                               "BigCreekLower", "CoalCreekLower", "McGeeCreekLower", 
                                               "West Brook NWIS", "West Brook 0", 
                                               "Paine Run 10", "Staunton River 10", "Piney River 10", 
                                               "Shields River Valley Ranch", "Shields River ab Smith NWIS", 
                                               "EF Duck Creek be HF",
                                               "Spread Creek Dam"), "big", "little"))
siteinfo2 %>% arrange(region, basin, subbasin, designation) %>% kable()
site_id site_name lat long station_no designation basin region subbasin source area_sqmi elev_ft
BIG_001 BigCreekLower 48.60295 -114.18700 NA big Flathead Flat Big Creek ECOD 81.2083617 3428.6560
BIG_004 BigCreekMiddle 48.59770 -114.22500 NA little Flathead Flat Big Creek ECOD 73.5768438 3528.9551
BIG_005 BigCreekUpper 48.57493 -114.31700 NA little Flathead Flat Big Creek ECOD 53.1555778 3913.1550
BIG_006 HallowattCreekLower 48.57427 -114.31700 NA little Flathead Flat Big Creek ECOD 28.1074377 3916.4062
BIG_002 LangfordCreekLower 48.60369 -114.18700 NA little Flathead Flat Big Creek ECOD 3.9917383 3431.6910
BIG_003 LangfordCreekUpper 48.61350 -114.20200 NA little Flathead Flat Big Creek ECOD 2.8629722 3594.2884
BIG_008 NicolaCreek 48.55442 -114.37500 NA little Flathead Flat Big Creek ECOD 3.3459243 4958.1863
BIG_007 SkookoleelCreek 48.57092 -114.31200 NA little Flathead Flat Big Creek ECOD 8.5886199 3965.1819
BIG_009 WernerCreek 48.59324 -114.36800 NA little Flathead Flat Big Creek ECOD 3.9795282 4284.3249
HAL Hallowat Creek NWIS 48.61005 -114.38193 12355342 little Flathead Flat Big Creek NWIS 10.0317552 4453.6190
BIG Big Creek NWIS 48.59769 -114.22472 12355347 little Flathead Flat Big Creek NWIS 73.5899716 3528.2743
COA_001 CoalCreekLower 48.66182 -114.24200 NA big Flathead Flat Coal Creek ECOD 59.9694259 3658.9180
COA_009 CoalCreekHeadwaters 48.70570 -114.45500 NA little Flathead Flat Coal Creek ECOD 12.4254748 4595.6775
COA_006 CoalCreekMiddle 48.67442 -114.31700 NA little Flathead Flat Coal Creek ECOD 45.3748041 3874.4946
COA_003 CycloneCreekLower 48.66582 -114.24500 NA little Flathead Flat Coal Creek ECOD 12.6353670 3701.7687
COA_004 CycloneCreekMiddle 48.66871 -114.26700 NA little Flathead Flat Coal Creek ECOD 11.2380763 3853.7423
COA_005 CycloneCreekUpper 48.69030 -114.27600 NA little Flathead Flat Coal Creek ECOD 9.8960331 4039.1350
COA_002 MeadowCreek 48.65865 -114.23600 NA little Flathead Flat Coal Creek ECOD 5.7153134 3684.4886
COA_008 CoalCreekNorth 48.69130 -114.37600 NA little Flathead Flat Coal Creek ECOD 20.8457145 4154.4794
MCG_002 McGeeCreekLower 48.61789 -114.06700 NA big Flathead Flat McGee Creek ECOD 7.4164591 3711.2340
MCG_001 McGeeCreekTrib 48.62212 -114.07400 NA little Flathead Flat McGee Creek ECOD 1.2240208 3697.0725
MCG_003 McGeeCreekUpper 48.59827 -114.04000 NA little Flathead Flat McGee Creek ECOD 3.3382493 3866.2909
WM West Brook 0 42.41434 -72.62929 01171100 big West Brook Mass West Brook ECOD 11.3860459 154.5905
WBR West Brook NWIS 42.41437 -72.62876 01171100 big West Brook Mass West Brook NWIS 11.3929050 154.1473
AB Avery Brook 42.44981 -72.69402 01171000 little West Brook Mass West Brook ECOD 2.8322503 699.2518
JB Jimmy Brook 42.43485 -72.67102 01171040 little West Brook Mass West Brook ECOD 0.9743054 452.0916
MB Mitchell Brook 42.43355 -72.66819 01171080 little West Brook Mass West Brook ECOD 0.3514676 437.5512
OL Obear Brook Lower 42.43429 -72.67154 01171070 little West Brook Mass West Brook ECOD 0.5654137 465.1764
SD Sanderson Brook 42.43635 -72.68685 01171010 little West Brook Mass West Brook ECOD 1.7567871 640.4320
WL West Brook Lower 42.43187 -72.66423 01171090 little West Brook Mass West Brook ECOD 8.5098116 401.9724
WU West Brook Upper 42.43749 -72.67602 01171030 little West Brook Mass West Brook ECOD 6.3540403 516.3184
WR West Brook Reservoir 42.43738 -72.68166 01171020 little West Brook Mass West Brook ECOD 6.2101754 568.7790
WW West Whately Brook 42.45678 -72.68820 01171005 little West Brook Mass West Brook ECOD 0.4928159 686.2488
AVB Avery Brook NWIS 42.44991 -72.69355 01171000 little West Brook Mass West Brook NWIS 2.8334885 696.2354
DBF Donner Blitzen River nr Frenchglen NWIS 42.79083 -118.86750 10396000 big Donner Blitzen Oreg Donner Blitzen NWIS 204.9669881 4262.8244
DBI Donner Blitzen ab Indian NWIS 42.63743 -118.76077 423815118453900 little Donner Blitzen Oreg Donner Blitzen NWIS 60.6960528 5097.0236
IND Indian Creek NWIS 42.64180 -118.75899 423830118453200 little Donner Blitzen Oreg Donner Blitzen NWIS 19.0189031 5095.5474
LBL Little Blizten River NWIS 42.66755 -118.76015 424003118453700 little Donner Blitzen Oreg Donner Blitzen NWIS 18.5136726 5069.1385
DBB Donner Blitzen nr Burnt Car NWIS 42.72374 -118.83307 424325118495900 little Donner Blitzen Oreg Donner Blitzen NWIS 163.5486547 4514.4190
DBA Donner Blitzen ab Fish NWIS 42.76296 -118.84311 424547118503500 little Donner Blitzen Oreg Donner Blitzen NWIS 175.4902236 4340.9354
FSH Fish Creek NWIS 42.76407 -118.84232 424551118503200 little Donner Blitzen Oreg Donner Blitzen NWIS 22.2913828 4344.7481
PA_10FL Paine Run 10 38.19860 -78.79310 01627400 big Paine Run Shen Paine Run ECOD 4.8965855 1400.3952
PA_01FL Paine Run 01 38.20920 -78.75270 0162732260 little Paine Run Shen Paine Run ECOD 0.6700740 1840.9670
PA_02FL Paine Run 02 38.20730 -78.75700 0162732518 little Paine Run Shen Paine Run ECOD 0.9912138 1770.2417
PA_03FL Paine Run 03 38.20520 -78.76120 0162732782 little Paine Run Shen Paine Run ECOD 1.2384319 1708.7624
PA_04FL Paine Run 04 38.20030 -78.76560 0162733238 little Paine Run Shen Paine Run ECOD 1.5651468 1634.2663
PA_05FL Paine Run 05 38.19710 -78.76870 0162734220 little Paine Run Shen Paine Run ECOD 2.3149357 1587.6120
PA_06FL Paine Run 06 38.19740 -78.77370 01627352 little Paine Run Shen Paine Run ECOD 2.8534416 1552.5478
PA_07FL Paine Run 07 38.19560 -78.77800 01627358 little Paine Run Shen Paine Run ECOD 3.0594680 1520.2334
PA_08FL Paine Run 08 38.19420 -78.78370 0162737249 little Paine Run Shen Paine Run ECOD 3.5932435 1474.8519
PA_09FL Paine Run 09 38.19650 -78.78860 01627380 little Paine Run Shen Paine Run ECOD 4.0467174 1433.8870
PI_10FL Piney River 10 38.70130 -78.26740 0166236713 big Piney River Shen Piney River ECOD 4.8079952 1183.1180
PI_01FL Piney River 01 38.74550 -78.28180 0166235777 little Piney River Shen Piney River ECOD 0.5466476 2501.9557
PI_02FL Piney River 02 38.74250 -78.28550 0166235998 little Piney River Shen Piney River ECOD 1.1006113 2369.7393
PI_03FL Piney River 03 38.73840 -78.28930 0166236114 little Piney River Shen Piney River ECOD 1.4355099 2162.4096
PI_04FL Piney River 04 38.73410 -78.29160 0166236168 little Piney River Shen Piney River ECOD 1.8095900 2024.9685
PI_05FL Piney River 05 38.72800 -78.29160 0166236224 little Piney River Shen Piney River ECOD 2.3054298 1874.3122
PI_06FL Piney River 06 38.72570 -78.28360 0166236285 little Piney River Shen Piney River ECOD 2.6988937 1603.3088
PI_08FL Piney River 08 38.71190 -78.27720 0166236472 little Piney River Shen Piney River ECOD 3.6105641 1354.6080
PI_09FL Piney River 09 38.70450 -78.27040 0166236559 little Piney River Shen Piney River ECOD 4.2830953 1238.9199
SR_10FL Staunton River 10 38.44450 -78.37070 0166526910 big Staunton River Shen Staunton River ECOD 4.1061151 996.5581
SR_01FL Staunton River 01 38.46700 -78.41770 0166526050 little Staunton River Shen Staunton River ECOD 0.4772281 2940.0147
SR_02FL Staunton River 02 38.46300 -78.41010 0166526110 little Staunton River Shen Staunton River ECOD 0.8271327 2487.8920
SR_03FL Staunton River 03 38.45940 -78.40360 0166526165 little Staunton River Shen Staunton River ECOD 1.2444908 2199.2577
SR_04FL Staunton River 04 38.45840 -78.39950 0166526303 little Staunton River Shen Staunton River ECOD 1.9000379 2053.1192
SR_05FL Staunton River 05 38.45920 -78.38720 0166526399 little Staunton River Shen Staunton River ECOD 2.4754040 1710.9466
SR_06FL Staunton River 06 38.45650 -78.38090 0166526453 little Staunton River Shen Staunton River ECOD 2.8062837 1524.8785
SR_07FL Staunton River 07 38.45350 -78.37900 0166526484 little Staunton River Shen Staunton River ECOD 2.9037810 1432.0669
SR_08FL Staunton River 08 38.44870 -78.37820 0166526535 little Staunton River Shen Staunton River ECOD 3.1630527 1265.5245
SR_09FL Staunton River 09 38.44640 -78.37590 0166526567 little Staunton River Shen Staunton River ECOD 3.2551121 1154.3350
DU01 EF Duck Creek be HF 45.87142 -110.24438 NA big Duck Creek Shields Duck Creek ECOD 9.4495375 5342.4715
DU02 EF Duck Creek ab HF 45.87570 -110.24775 NA little Duck Creek Shields Duck Creek ECOD 5.4605992 5429.5483
DU03 Henrys Fork 45.90132 -110.24998 NA little Duck Creek Shields Duck Creek ECOD 1.6960835 5926.9079
SH07 Shields River Valley Ranch 46.16716 -110.55429 NA big Shields River Shields Shields River ECOD 53.4215153 5744.1457
SRS Shields River ab Smith NWIS 46.16716 -110.55429 06192980 big Shields River Shields Shields River NWIS 53.4215153 5744.1457
SH02 Buck Creek 46.18374 -110.38463 NA little Shields River Shields Shields River ECOD 4.4298623 6493.8012
SH03 Crandall Creek 46.18454 -110.40734 NA little Shields River Shields Shields River ECOD 2.5338355 6361.7450
SH04 Deep Creek 46.17037 -110.45686 NA little Shields River Shields Shields River ECOD 6.1986519 6142.5211
SH05 Dugout Creek 46.18431 -110.37980 NA little Shields River Shields Shields River ECOD 2.3945376 6496.1053
SH06 Lodgepole Creek 46.18146 -110.35920 NA little Shields River Shields Shields River ECOD 1.3590115 6610.3856
SH08 Shields River ab Dugout 46.18407 -110.37945 NA little Shields River Shields Shields River ECOD 8.6790373 6497.0348
DUG Dugout Creek NWIS 46.18481 -110.37964 06192900 little Shields River Shields Shields River NWIS 2.3849317 6505.1692
SP11 Spread Creek Dam 43.77230 -110.48040 NA big Snake River Snake Snake River ECOD 97.4926478 7130.2353
SP01 Grizzly Creek 43.77399 -110.23554 NA little Snake River Snake Snake River ECOD 12.6823126 8339.7503
SP02 Grouse Creek 43.74895 -110.31903 NA little Snake River Snake Snake River ECOD 5.1508608 7869.5257
SP03 Leidy Creek Lower 43.73347 -110.31434 NA little Snake River Snake Snake River ECOD 5.1718308 7958.3398
SP04 Leidy Creek Upper 43.72041 -110.37030 NA little Snake River Snake Snake River ECOD 2.1816563 8706.8560
SP05 Leidy Creek Mouth 43.73165 -110.31526 NA little Snake River Snake Snake River ECOD 5.1585521 7979.3625
SP06 NF Spread Creek Lower 43.76467 -110.32339 NA little Snake River Snake Snake River ECOD 27.9304796 7788.5460
SP07 NF Spread Creek Upper 43.77413 -110.23535 NA little Snake River Snake Snake River ECOD 3.6804425 8340.2053
SP08 Rock Creek 43.76640 -110.44859 NA little Snake River Snake Snake River ECOD 4.7413085 7285.9575
SP09 SF Spread Creek Lower 43.76430 -110.32384 NA little Snake River Snake Snake River ECOD 44.2556218 7785.6007
SP10 SF Spread Creek Upper 43.73664 -110.31387 NA little Snake River Snake Snake River ECOD 35.0605919 7937.8850
LEI Leidy Creek Mouth NWIS 43.73311 -110.31456 13012465 little Snake River Snake Snake River NWIS 5.1689622 7960.3743
SFS SF Spread Creek Lower NWIS 43.76348 -110.32379 13012475 little Snake River Snake Snake River NWIS 44.2420544 7789.9333

7.1.2 Stream temp

Code
dat <- read_csv("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/Data/EcoDrought_FlowTempData_Raw_ECODandNWIS.csv") %>% 
  filter(site_name %in% siteinfo2$site_name) %>% 
  mutate(datetime = floor_date(datetime, unit = "hour")) %>%
  group_by(region, basin, subbasin, site_name, datetime) %>% 
  summarise(tempc = mean(tempc)) %>%
  ungroup()
tz(dat$datetime)
[1] "UTC"
Code
dat
# A tibble: 3,257,109 × 6
   region basin    subbasin  site_name      datetime            tempc
   <chr>  <chr>    <chr>     <chr>          <dttm>              <dbl>
 1 Flat   Flathead Big Creek Big Creek NWIS 2018-09-10 20:00:00    NA
 2 Flat   Flathead Big Creek Big Creek NWIS 2018-09-10 21:00:00    NA
 3 Flat   Flathead Big Creek Big Creek NWIS 2018-09-10 22:00:00    NA
 4 Flat   Flathead Big Creek Big Creek NWIS 2018-09-10 23:00:00    NA
 5 Flat   Flathead Big Creek Big Creek NWIS 2018-09-11 00:00:00    NA
 6 Flat   Flathead Big Creek Big Creek NWIS 2018-09-11 01:00:00    NA
 7 Flat   Flathead Big Creek Big Creek NWIS 2018-09-11 02:00:00    NA
 8 Flat   Flathead Big Creek Big Creek NWIS 2018-09-11 03:00:00    NA
 9 Flat   Flathead Big Creek Big Creek NWIS 2018-09-11 04:00:00    NA
10 Flat   Flathead Big Creek Big Creek NWIS 2018-09-11 05:00:00    NA
# ℹ 3,257,099 more rows
Code
ddd <- read_csv("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/Data/EcoDrought_FlowTempData_Raw_ECODandNWIS.csv")
ddd %>% filter(site_name == "Jimmy Brook") %>% select(datetime, tempc) %>% dygraph() %>% dyRangeSelector() 

7.1.3 Join air temp

Apply air temperature data from a single site within each sub-basin to all water temperature/flow sites within the same sub-basin. This implicitly assumes that air temperature is homogeneous within each sub-basin, which is an oversimplification, but necessary as we only have air temperature observations at a single site within most sub-basins.

subbasin temp_site
West Brook West Brook Center
Paine Run Paine Run 10
Staunton River Staunton River 10
Piney River Piney River 10

7.1.3.1 West Brook

Code
airtemp_wb <- read_csv("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/Data/Raw Data/Mass/WestBrookCentral_AirTemp_31December2019_to_7July2024.csv")
tz(airtemp_wb$datetime_est) <- "EST"
airtemp_wb$datetime <- with_tz(airtemp_wb$datetime_est, "UTC")
airtemp_wb <- airtemp_wb %>% 
  select(datetime, tempc_air) %>% 
  mutate(datetime = floor_date(datetime, unit = "hour")) %>%
  group_by(datetime) %>% 
  summarise(tempc_air = mean(tempc_air)) %>%
  ungroup()
tz(airtemp_wb$datetime)
[1] "UTC"
Code
# join hourly stream and air temp
dat_wb <- dat %>% filter(subbasin == "West Brook") %>% left_join(airtemp_wb)
head(dat_wb)
# A tibble: 6 × 7
  region basin      subbasin   site_name  datetime               tempc tempc_air
  <chr>  <chr>      <chr>      <chr>      <dttm>                 <dbl>     <dbl>
1 Mass   West Brook West Brook Avery Bro… 2020-02-20 05:00:00  0.00556     -4.76
2 Mass   West Brook West Brook Avery Bro… 2020-02-20 06:00:00 -0.0403      -5.43
3 Mass   West Brook West Brook Avery Bro… 2020-02-20 07:00:00 -0.0403      -5.84
4 Mass   West Brook West Brook Avery Bro… 2020-02-20 08:00:00  0.00556     -6.31
5 Mass   West Brook West Brook Avery Bro… 2020-02-20 09:00:00  0.0361      -7.01
6 Mass   West Brook West Brook Avery Bro… 2020-02-20 10:00:00  0.0667      -7.76

7.1.3.2 Shenandoah

Code
airtemp_shen <- read_csv("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/Data/Raw data/Virg/Shen_BigG_TempEtc_hourly_UVA.csv") %>%
  rename(tempc_air = airtempc_mean) %>%
  select(site_id, datetime, tempc_air) %>%
  left_join(siteinfo %>% select(site_id, site_name, region, basin, subbasin))
tz(airtemp_shen$datetime) <- "EST"
airtemp_shen$datetime <- with_tz(airtemp_shen$datetime, "UTC")
airtemp_shen <- airtemp_shen %>% 
  mutate(datetime = floor_date(datetime, unit = "hour")) %>%
  group_by(region, basin, subbasin, site_name, datetime) %>% 
  summarise(tempc_air = mean(tempc_air)) %>%
  ungroup()
# join hourly stream and air temp
dat_shen <- dat %>% filter(region == "Shen") %>% left_join(airtemp_shen %>% select(-site_name))
head(dat_shen)
# A tibble: 6 × 7
  region basin     subbasin  site_name    datetime            tempc tempc_air
  <chr>  <chr>     <chr>     <chr>        <dttm>              <dbl>     <dbl>
1 Shen   Paine Run Paine Run Paine Run 01 2018-10-22 16:00:00  8.97      5.60
2 Shen   Paine Run Paine Run Paine Run 01 2018-10-22 17:00:00  9.28      8.19
3 Shen   Paine Run Paine Run Paine Run 01 2018-10-22 18:00:00  9.56     10.1 
4 Shen   Paine Run Paine Run Paine Run 01 2018-10-22 19:00:00  9.86     10.9 
5 Shen   Paine Run Paine Run Paine Run 01 2018-10-22 20:00:00 10.1      11.1 
6 Shen   Paine Run Paine Run Paine Run 01 2018-10-22 21:00:00 10.2      10.8 

7.1.4 Bind data

Code
dat_bind <- bind_rows(dat_wb, dat_shen)
dat_bind$datetime <- with_tz(dat_bind$datetime, "EST")
dat_bind
# A tibble: 1,391,908 × 7
   region basin      subbasin   site_name datetime               tempc tempc_air
   <chr>  <chr>      <chr>      <chr>     <dttm>                 <dbl>     <dbl>
 1 Mass   West Brook West Brook Avery Br… 2020-02-20 00:00:00  0.00556     -4.76
 2 Mass   West Brook West Brook Avery Br… 2020-02-20 01:00:00 -0.0403      -5.43
 3 Mass   West Brook West Brook Avery Br… 2020-02-20 02:00:00 -0.0403      -5.84
 4 Mass   West Brook West Brook Avery Br… 2020-02-20 03:00:00  0.00556     -6.31
 5 Mass   West Brook West Brook Avery Br… 2020-02-20 04:00:00  0.0361      -7.01
 6 Mass   West Brook West Brook Avery Br… 2020-02-20 05:00:00  0.0667      -7.76
 7 Mass   West Brook West Brook Avery Br… 2020-02-20 06:00:00  0.0667      -8.38
 8 Mass   West Brook West Brook Avery Br… 2020-02-20 07:00:00  0.0667      -8.81
 9 Mass   West Brook West Brook Avery Br… 2020-02-20 08:00:00  0.0667      -8.25
10 Mass   West Brook West Brook Avery Br… 2020-02-20 09:00:00  0.128       -6.38
# ℹ 1,391,898 more rows

7.1.5 View data

Unique sites

Code
unique(dat_bind$site_name)
 [1] "Avery Brook"          "Avery Brook NWIS"     "Jimmy Brook"         
 [4] "Mitchell Brook"       "Obear Brook Lower"    "Sanderson Brook"     
 [7] "West Brook 0"         "West Brook Lower"     "West Brook NWIS"     
[10] "West Brook Reservoir" "West Brook Upper"     "West Whately Brook"  
[13] "Paine Run 01"         "Paine Run 02"         "Paine Run 06"        
[16] "Paine Run 07"         "Paine Run 08"         "Paine Run 10"        
[19] "Piney River 10"       "Staunton River 02"    "Staunton River 03"   
[22] "Staunton River 06"    "Staunton River 07"    "Staunton River 09"   
[25] "Staunton River 10"   

Notes:

  • West Brook
    • Mitchell, Obear, and WB Lower have erroneous water temp readings after ~Aug. 31, 2021
    • All WB sites have error in F to C conversion (from Aquarius)

View single site using dyGraphs, e.g., Staunton River 10.

Code
dat_bind %>% filter(site_name %in% "Staunton River 10") %>% select(datetime, tempc, tempc_air) %>% dygraph(main = "Staunton River 10") %>% dyRangeSelector() %>% dyAxis("y", label = "Stream temp. (deg C)") %>% dyAxis("y2", label = "Air temp. (deg C)") %>% dySeries("tempc", axis = "y", label = "Stream") %>% dySeries("tempc_air", axis = "y2", label = "Air")

7.1.6 Format data for PASTA

Code
dat2 <- dat_bind %>% 
  mutate(year = year(datetime),
         yday = yday(datetime),
         hour = hour(datetime)) %>%
  rename(airTemperature = tempc_air,
         waterTemperature = tempc) %>%
  select(site_name, datetime, year, yday, hour, airTemperature, waterTemperature) %>%
  filter(waterTemperature >= 1)
range(dat2$waterTemperature)
[1]  1.00000 25.40898
Code
dat2
# A tibble: 694,325 × 7
   site_name   datetime             year  yday  hour airTemperature
   <chr>       <dttm>              <dbl> <dbl> <int>          <dbl>
 1 Avery Brook 2020-02-25 06:00:00  2020    56     6          0.121
 2 Avery Brook 2020-02-25 07:00:00  2020    56     7          0.176
 3 Avery Brook 2020-02-25 08:00:00  2020    56     8          0.563
 4 Avery Brook 2020-02-25 09:00:00  2020    56     9          1.63 
 5 Avery Brook 2020-02-25 10:00:00  2020    56    10          2.94 
 6 Avery Brook 2020-02-25 11:00:00  2020    56    11          4.26 
 7 Avery Brook 2020-02-25 12:00:00  2020    56    12          5.83 
 8 Avery Brook 2020-02-26 09:00:00  2020    57     9          2.06 
 9 Avery Brook 2020-02-26 10:00:00  2020    57    10          2.46 
10 Avery Brook 2020-02-26 11:00:00  2020    57    11          2.41 
# ℹ 694,315 more rows
# ℹ 1 more variable: waterTemperature <dbl>

7.2 Create functions

7.2.1 Curve fits

Functions to model air and water temperature time series data using sine wave regression:

Code
curve_fit_air <- function(d, minDataLength = 20) {
  # return NAs if not enough data
  if (length(d$airTemperature) < minDataLength) { return(list(model = NA, rSquared = NA)) }
  # convert hours to radians/circular
  d$hour_rad <- d$hour * (2 * pi / 24)
  # starting values
  startAir <- list(A = -1, B = -1, C = mean(d$airTemperature, na.rm = TRUE))
  # run sine wave regression as an NLS model
  modelAir <- tryCatch({
    nlsLM(airTemperature ~ A * sin(hour_rad) + B * cos(hour_rad) + C, data = d, start = startAir)
    }, error = function(e) {
      return(list(model = NA, rSquared = NA))
      })
  # rSquared calculation
  residuals <- residuals(modelAir)
  sst <- sum((d$airTemperature - mean(d$airTemperature))^2)
  ssr <- sum(residuals^2)
  rSquared <- 1 - (ssr / sst)
  # return fitted model object and R2
  return(list(model = modelAir, rSquared = rSquared))
  }

curve_fit_water <- function(d, minDataLength = 20) {
  # return NAs if not enough data
  if (length(d$waterTemperature) < minDataLength) { return(list(model = NA, rSquared = NA)) }
  # convert hours to radians/circular
  d$hour_rad <- d$hour * (2 * pi / 24)
  # starting values
  startWater <- list(A = -1, B = -1, C = mean(d$waterTemperature, na.rm = TRUE))
  # run sine wave regression as an NLS model
  modelWater <- tryCatch({
    nlsLM(waterTemperature ~ A * sin(hour_rad) + B * cos(hour_rad) + C, data = d, start = startWater)
    }, error = function(e) {
      return(list(model = NA, rSquared = NA))
      })
  # rSquared calculation
  residuals <- residuals(modelWater)
  sst <- sum((d$waterTemperature - mean(d$waterTemperature))^2)
  ssr <- sum(residuals^2)
  rSquared <- 1 - (ssr / sst)
  # return fitted model object and R2
  return(list(model = modelWater, rSquared = rSquared))
  }

7.2.2 Extract parameters

Function to extract model parameters from sine wave regression fits:

Code
extract_params <- function(model) {
  if (is.null(model)) {
    return(NULL)
  }

  params <- tryCatch({
    broom.mixed::tidy(model)
  }, error = function(e) {
    NULL
  })

  return(params)
}

7.2.3 Get model parameters

Function to fit sine wave regression models to data and grab amplitude, phase, and mean temperature parameters from fitted model objects:

Code
getParams <- function(dtHOUR, minDataLength = 20) {
  # Models for air temperature
  modelsAir <- dtHOUR %>%
    group_by(site_name, year, yday) %>%
    nest() %>%
    mutate(dataLength = map_dbl(data, ~length(.x$airTemperature))) |>
    filter(dataLength > minDataLength) |> # filter out daily datasets that are too short
    mutate(
      model0 = map(data, curve_fit_air),
      model = map(model0, 'model'),
      rSquared = map(model0, 'rSquared'),
      params = map(model, extract_params)
    ) %>%
    unnest(c(params, rSquared)) |>
    select(-model0, -model, -data) |>
    mutate(tempVar = "air")

  # Models for water temperature
  modelsWater <- dtHOUR %>%
    group_by(site_name, year, yday) %>%
    nest() %>%
    mutate(dataLength = map_dbl(data, ~length(.x$waterTemperature))) |>
    filter(dataLength > minDataLength) |> # filter out daily datasets that are too short
    mutate(
      model0 = map(data, curve_fit_water),
      model = map(model0, 'model'),
      rSquared = map(model0, 'rSquared'),
      params = map(model, extract_params)
    ) %>%
    unnest(c(params, rSquared)) |>
    select(-model0, -model, -data) |>
    mutate(tempVar = "water")

  # Collect air and water models
  models <- bind_rows(modelsAir, modelsWater) |>
    filter(!is.na(term)) |>
    #group_by(site_name, year, yday, tempVar) |>
    select(-std.error, -statistic, -p.value) |> # need to lose these columns so there are no unique values remaining in non-widened cols
    pivot_wider(names_from = term, values_from = estimate)
  
  # Calculate amplitude and phase for air and water data
  params <- models |>
    group_by(site_name, year, yday, tempVar) |>
    mutate(
      amplitude = sqrt(A^2 + B^2),
      phase = ifelse(A < 0,
        12 + (24 / (2 * pi)) * atan(B / A), #switched order of A and B per Tim's email 5/17/24
        (24 / (2 * pi)) * atan(B / A)),
      mean = C
    ) |>
    left_join(
      dtHOUR |> select(site_name, year, yday) |> distinct(), by = c("site_name", "year", "yday")
    ) |>
    ungroup()
  
  # Return amplitude and phase parameters
  return(params)
}

7.2.4 Get derived parameters

Function to calculate amplitude ratio, phase lag, and mean ratio parameters from sine wave regression models fits to daily paired air-stream temperature data:

Code
getAmpPhase <- function(paramsIn) {
  paramsIn |>
    select(site_name, year, yday, tempVar, amplitude, phase, mean, rSquared) |>
    pivot_wider(
      names_from = tempVar,
      values_from = c(amplitude, phase, mean, rSquared)
    ) |>
    select(
      site_name, year, yday,
      starts_with("amplitude"),
      starts_with("phase"),
      starts_with("mean"),
      starts_with("rSquared")
    ) |>
    mutate(
      amplitudeRatio = amplitude_water / amplitude_air,
      phaseLag = phase_water - phase_air,
      meanRatio = mean_water / mean_air
    )
}

7.2.5 Predictions

Function to predict air or water temperature from fitted models:

Code
getParamsPred <- function(paramsIn) {

  uniqueValues <- paramsIn |>
    distinct(site_name, year, yday, tempVar)

  preds <- crossing(uniqueValues, hour = 0:23) %>%
    left_join(paramsIn, by = c("site_name", "year", "yday", "tempVar")) %>%
    mutate(
      hour_rad = hour * (2 * pi / 24),
      predTemp = A * sin(hour_rad) + B * cos(hour_rad) + C
    )

    return(preds)
 }

7.3 Perform PASTA

7.3.1 Get model parameters

Code
pasta <- getParams(dtHOUR = dat2)
pasta
# A tibble: 56,658 × 12
   site_name    year  yday dataLength rSquared tempVar      A     B     C
   <chr>       <dbl> <dbl>      <dbl>    <dbl> <chr>    <dbl> <dbl> <dbl>
 1 Avery Brook  2020    64         23    0.487 air     -0.675 -1.74  3.96
 2 Avery Brook  2020    69         24    0.894 air     -6.97  -6.42  7.00
 3 Avery Brook  2020    70         24    0.912 air     -4.60  -3.64  8.89
 4 Avery Brook  2020    71         24    0.426 air     -1.66  -1.86  5.60
 5 Avery Brook  2020    72         24    0.924 air     -1.97  -1.18  3.19
 6 Avery Brook  2020    73         24    0.730 air     -1.85  -2.11  4.44
 7 Avery Brook  2020    74         24    0.862 air     -2.27  -2.55  3.75
 8 Avery Brook  2020    75         23    0.640 air     -1.48  -2.95  3.18
 9 Avery Brook  2020    77         24    0.838 air     -2.47  -1.14  1.29
10 Avery Brook  2020    78         24    0.894 air     -4.71  -3.01  4.65
# ℹ 56,648 more rows
# ℹ 3 more variables: amplitude <dbl>, phase <dbl>, mean <dbl>

Write to file

Code
write_csv(pasta, "C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/EcoDrought-Analysis/Covariates/pasta_daily_parameters.csv")

Read in

Code
pasta <- read_csv("C:/Users/jbaldock/OneDrive - DOI/Documents/USGS/EcoDrought/EcoDrought Working/EcoDrought-Analysis/Covariates/pasta_daily_parameters.csv")

7.3.2 Get derived parameters

Code
pasta_derived <- getAmpPhase(paramsIn = pasta)
pasta_derived <- pasta_derived %>% mutate(date = parse_date_time(x = paste(year, yday), orders = "yj")) %>% relocate(date, .after = site_name)
pasta_derived
# A tibble: 28,527 × 15
   site_name   date                 year  yday amplitude_air amplitude_water
   <chr>       <dttm>              <dbl> <dbl>         <dbl>           <dbl>
 1 Avery Brook 2020-03-04 00:00:00  2020    64          1.86           0.348
 2 Avery Brook 2020-03-09 00:00:00  2020    69          9.48           0.783
 3 Avery Brook 2020-03-10 00:00:00  2020    70          5.86           0.613
 4 Avery Brook 2020-03-11 00:00:00  2020    71          2.50           0.408
 5 Avery Brook 2020-03-12 00:00:00  2020    72          2.30           0.203
 6 Avery Brook 2020-03-13 00:00:00  2020    73          2.81           0.335
 7 Avery Brook 2020-03-14 00:00:00  2020    74          3.41           0.509
 8 Avery Brook 2020-03-15 00:00:00  2020    75          3.30           0.616
 9 Avery Brook 2020-03-17 00:00:00  2020    77          2.72           0.398
10 Avery Brook 2020-03-18 00:00:00  2020    78          5.59           0.709
# ℹ 28,517 more rows
# ℹ 9 more variables: phase_air <dbl>, phase_water <dbl>, mean_air <dbl>,
#   mean_water <dbl>, rSquared_air <dbl>, rSquared_water <dbl>,
#   amplitudeRatio <dbl>, phaseLag <dbl>, meanRatio <dbl>

7.3.3 View derived time series

Code
unique(pasta_derived$site_name)
 [1] "Avery Brook"          "Jimmy Brook"          "Mitchell Brook"      
 [4] "Obear Brook Lower"    "Sanderson Brook"      "West Brook 0"        
 [7] "West Brook Lower"     "West Brook Reservoir" "West Brook Upper"    
[10] "West Whately Brook"   "Paine Run 01"         "Paine Run 02"        
[13] "Paine Run 06"         "Paine Run 07"         "Paine Run 08"        
[16] "Paine Run 10"         "Piney River 10"       "Staunton River 02"   
[19] "Staunton River 03"    "Staunton River 06"    "Staunton River 07"   
[22] "Staunton River 09"    "Staunton River 10"   

Set focal site

Code
focal_site <- "Staunton River 10"

View derived parameters

Code
pasta_derived %>% filter(site_name == focal_site)
# A tibble: 3,563 × 15
   site_name       date                 year  yday amplitude_air amplitude_water
   <chr>           <dttm>              <dbl> <dbl>         <dbl>           <dbl>
 1 Staunton River… 2014-03-27 00:00:00  2014    86          3.19           1.90 
 2 Staunton River… 2014-03-28 00:00:00  2014    87          5.61           1.64 
 3 Staunton River… 2014-03-29 00:00:00  2014    88          2.16           0.564
 4 Staunton River… 2014-03-30 00:00:00  2014    89          1.64           0.708
 5 Staunton River… 2014-03-31 00:00:00  2014    90          8.03           2.10 
 6 Staunton River… 2014-04-01 00:00:00  2014    91          6.14           1.41 
 7 Staunton River… 2014-04-02 00:00:00  2014    92          8.38           1.54 
 8 Staunton River… 2014-04-03 00:00:00  2014    93          4.67           1.04 
 9 Staunton River… 2014-04-04 00:00:00  2014    94          3.69           0.920
10 Staunton River… 2014-04-05 00:00:00  2014    95          4.13           0.687
# ℹ 3,553 more rows
# ℹ 9 more variables: phase_air <dbl>, phase_water <dbl>, mean_air <dbl>,
#   mean_water <dbl>, rSquared_air <dbl>, rSquared_water <dbl>,
#   amplitudeRatio <dbl>, phaseLag <dbl>, meanRatio <dbl>

Phase lag: phase_water - phase_air

Code
pasta_derived %>% filter(site_name == focal_site, rSquared_air >= 0.7 & rSquared_water >= 0.7) %>% select(date, phaseLag) %>% dygraph(main = focal_site) %>% dyRangeSelector() %>% dyAxis("y", label = "Phase lag (hrs)") %>% dyOptions(drawPoints = T, strokeWidth = 0, pointSize = 3) %>% dyLimit(0, color = "red") %>% dyHighlight(highlightCircleSize = 5)

Amplitude ratio: amplitude_water - amplitude_air

Code
pasta_derived %>% filter(site_name == focal_site, rSquared_air >= 0.7 & rSquared_water >= 0.7) %>% select(date, amplitudeRatio) %>% dygraph(main = focal_site) %>% dyRangeSelector() %>% dyAxis("y", label = "Amplitude ratio") %>% dyOptions(drawPoints = T, strokeWidth = 0, pointSize = 3) %>% dyHighlight(highlightCircleSize = 5)

PhaseLag ~ AmplitudeRatio, year = 2020

Code
pasta_derived %>% filter(site_name == focal_site, year == 2020, rSquared_air >= 0.7 & rSquared_water >= 0.7, phaseLag < 5) %>% ggplot() + geom_point(aes(x = amplitudeRatio, y = phaseLag)) + ggtitle(focal_site)

7.3.4 Observed vs. Predicted

Predict air and stream temperature from fitted sine wave regression models

Code
preds <- getParamsPred(paramsIn = pasta)
preds <- preds %>% mutate(datetime = parse_date_time(x = paste(year, yday, hour), orders = "yjh")) %>% relocate(datetime, .after = site_name) %>% select(site_name, datetime, tempVar, rSquared, predTemp) %>% pivot_wider(names_from = tempVar, values_from = c(predTemp, rSquared))
tz(preds$datetime) <- "EST"

Plot predicted air and stream temperature (orange and blue lines, respectively) over observed air and stream temperature (orange and blue points, respectively).

Code
dat2 %>% 
  select(site_name, datetime, airTemperature, waterTemperature) %>% 
  filter(site_name == focal_site) %>% 
  left_join(preds) %>% filter(site_name == focal_site, rSquared_air >= 0.7 & rSquared_water >= 0.7) %>% 
  select(-c(site_name, rSquared_air, rSquared_water)) %>% 
  mutate(datetime = datetime + hours(2)) %>% # need to fudge datetime b/c dyGraphs converts to local time zone
  dygraph(main = focal_site) %>% dyRangeSelector() %>% 
  dyAxis("y", label = "Water temp.") %>% 
  dyAxis("y2", label = "Air temp.") %>% 
  dySeries("waterTemperature", axis = "y", label = "Obs. water temp.", drawPoints = TRUE, strokeWidth = 0, pointSize = 3, color = "dodgerblue") %>% 
  dySeries("airTemperature", axis = "y2", label = "Obs. air temp.", drawPoints = TRUE, strokeWidth = 0, pointSize = 3, color = "darkorange")  %>% 
  dySeries("predTemp_water", axis = "y", label = "Pred. water temp.", color = "dodgerblue") %>% 
  dySeries("predTemp_air", axis = "y2", label = "Pred. air temp.", color = "darkorange")